{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import sys\n",
    "sys.path.append(r'C:\\GitWorks\\Muta3DMaps')\n",
    "from Muta3DMaps.core.pdbe.decode import ProcessSIFTS\n",
    "ProcessSIFTS.init_logger()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "SARS_CoV_2_pdbLyst = [\n",
    "    '5r7y',\n",
    "    '5r7z',\n",
    "    '5r80',\n",
    "    '5r81',\n",
    "    '5r82',\n",
    "    '5r83',\n",
    "    '5r84',\n",
    "    '6lu7',\n",
    "    '6lvn',\n",
    "    '6lxt',\n",
    "    '6m03',\n",
    "    '6m17',\n",
    "    '6vsb',\n",
    "    '6vw1',\n",
    "    '6vww',\n",
    "    '6vxx',\n",
    "    '6vyb',\n",
    "    '6vyo',\n",
    "    '6w01',\n",
    "    '6w02',\n",
    "    '6y2e',\n",
    "    '6y2f',\n",
    "    '6y2g',\n",
    "    '6y84']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:31<00:00, 31.16s/it]\n",
      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:03<00:00,  1.14it/s]\n",
      "2020-03-14 19:01:51,141 ProcessSIFTS INFO 4 ids downloaded in 3.53s\n"
     ]
    }
   ],
   "source": [
    "res = ProcessSIFTS.main(filePath='../Annotations/PDBe', folder='../Annotations/PDBe', related_pdb=SARS_CoV_2_pdbLyst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "sift_dfrm = pd.concat((ProcessSIFTS.dealWithInDe(ProcessSIFTS.reformat(route)) for route in res if route is not None), sort=False, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0         Q695T7\n",
       "2         Q9BYF1\n",
       "6         D5HJT4\n",
       "16    A0A2R3SV02\n",
       "Name: Entry, dtype: object"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sift_dfrm.Entry.drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Entry</th>\n",
       "      <td>Q695T7</td>\n",
       "      <td>Q9BYF1</td>\n",
       "      <td>D5HJT4</td>\n",
       "      <td>A0A2R3SV02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Length</th>\n",
       "      <td>634</td>\n",
       "      <td>805</td>\n",
       "      <td>1242</td>\n",
       "      <td>7092</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Metal binding</th>\n",
       "      <td>NaN</td>\n",
       "      <td>METAL 374;  /note=\"Zinc; catalytic\"; METAL 378...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Nucleotide binding</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cofactor</th>\n",
       "      <td>NaN</td>\n",
       "      <td>COFACTOR: Name=Zn(2+); Xref=ChEBI:CHEBI:29105;...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Binding site</th>\n",
       "      <td>NaN</td>\n",
       "      <td>BINDING 169;  /note=\"Chloride\"; BINDING 273;  ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Active site</th>\n",
       "      <td>NaN</td>\n",
       "      <td>ACT_SITE 375; ACT_SITE 505</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Entry name</th>\n",
       "      <td>S6A19_HUMAN</td>\n",
       "      <td>ACE2_HUMAN</td>\n",
       "      <td>D5HJT4_BCHK3</td>\n",
       "      <td>A0A2R3SV02_CVHSA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Gene names</th>\n",
       "      <td>SLC6A19 B0AT1</td>\n",
       "      <td>ACE2 UNQ868/PRO1885</td>\n",
       "      <td>S</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Organism</th>\n",
       "      <td>Homo sapiens (Human)</td>\n",
       "      <td>Homo sapiens (Human)</td>\n",
       "      <td>Bat SARS coronavirus HKU3-7</td>\n",
       "      <td>Bat SARS-like coronavirus</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Absorption</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Activity regulation</th>\n",
       "      <td>NaN</td>\n",
       "      <td>ACTIVITY REGULATION: Activated by chloride and...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Calcium binding</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Catalytic activity</th>\n",
       "      <td>NaN</td>\n",
       "      <td>CATALYTIC ACTIVITY: Reaction=angiotensin II + ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>CATALYTIC ACTIVITY: Reaction=ATP + H2O = ADP +...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DNA binding</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EC number</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3.4.17.23</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Function [CC]</th>\n",
       "      <td>FUNCTION: Transporter that mediates resorption...</td>\n",
       "      <td>FUNCTION: Carboxypeptidase which converts angi...</td>\n",
       "      <td>FUNCTION: Spike protein S1: attaches the virio...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kinetics</th>\n",
       "      <td>NaN</td>\n",
       "      <td>BIOPHYSICOCHEMICAL PROPERTIES:  Kinetic parame...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pathway</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pH dependence</th>\n",
       "      <td>NaN</td>\n",
       "      <td>BIOPHYSICOCHEMICAL PROPERTIES:  pH dependence:...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Redox potential</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rhea Ids</th>\n",
       "      <td>NaN</td>\n",
       "      <td>RHEA:26554</td>\n",
       "      <td>NaN</td>\n",
       "      <td>RHEA:13065; RHEA:21248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Site</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>SITE 654..655;  /note=\"Cleavage\";  /evidence=\"...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Temperature dependence</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Annotation</th>\n",
       "      <td>5 out of 5</td>\n",
       "      <td>5 out of 5</td>\n",
       "      <td>4 out of 5</td>\n",
       "      <td>3 out of 5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Caution</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>CAUTION: Lacks conserved residue(s) required f...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Features</th>\n",
       "      <td>Chain (1); Glycosylation (5); Modified residue...</td>\n",
       "      <td>Active site (2); Alternative sequence (2); Bet...</td>\n",
       "      <td>Chain (2); Coiled coil (2); Disulfide bond (2)...</td>\n",
       "      <td>Compositional bias (1); Domain (6); Region (1)...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                        0  \\\n",
       "Entry                                                              Q695T7   \n",
       "Length                                                                634   \n",
       "Metal binding                                                         NaN   \n",
       "Nucleotide binding                                                    NaN   \n",
       "Cofactor                                                              NaN   \n",
       "Binding site                                                          NaN   \n",
       "Active site                                                           NaN   \n",
       "Entry name                                                    S6A19_HUMAN   \n",
       "Gene names                                                  SLC6A19 B0AT1   \n",
       "Organism                                             Homo sapiens (Human)   \n",
       "Absorption                                                            NaN   \n",
       "Activity regulation                                                   NaN   \n",
       "Calcium binding                                                       NaN   \n",
       "Catalytic activity                                                    NaN   \n",
       "DNA binding                                                           NaN   \n",
       "EC number                                                             NaN   \n",
       "Function [CC]           FUNCTION: Transporter that mediates resorption...   \n",
       "Kinetics                                                              NaN   \n",
       "Pathway                                                               NaN   \n",
       "pH dependence                                                         NaN   \n",
       "Redox potential                                                       NaN   \n",
       "Rhea Ids                                                              NaN   \n",
       "Site                                                                  NaN   \n",
       "Temperature dependence                                                NaN   \n",
       "Annotation                                                     5 out of 5   \n",
       "Caution                                                               NaN   \n",
       "Features                Chain (1); Glycosylation (5); Modified residue...   \n",
       "\n",
       "                                                                        1  \\\n",
       "Entry                                                              Q9BYF1   \n",
       "Length                                                                805   \n",
       "Metal binding           METAL 374;  /note=\"Zinc; catalytic\"; METAL 378...   \n",
       "Nucleotide binding                                                    NaN   \n",
       "Cofactor                COFACTOR: Name=Zn(2+); Xref=ChEBI:CHEBI:29105;...   \n",
       "Binding site            BINDING 169;  /note=\"Chloride\"; BINDING 273;  ...   \n",
       "Active site                                    ACT_SITE 375; ACT_SITE 505   \n",
       "Entry name                                                     ACE2_HUMAN   \n",
       "Gene names                                            ACE2 UNQ868/PRO1885   \n",
       "Organism                                             Homo sapiens (Human)   \n",
       "Absorption                                                            NaN   \n",
       "Activity regulation     ACTIVITY REGULATION: Activated by chloride and...   \n",
       "Calcium binding                                                       NaN   \n",
       "Catalytic activity      CATALYTIC ACTIVITY: Reaction=angiotensin II + ...   \n",
       "DNA binding                                                           NaN   \n",
       "EC number                                                       3.4.17.23   \n",
       "Function [CC]           FUNCTION: Carboxypeptidase which converts angi...   \n",
       "Kinetics                BIOPHYSICOCHEMICAL PROPERTIES:  Kinetic parame...   \n",
       "Pathway                                                               NaN   \n",
       "pH dependence           BIOPHYSICOCHEMICAL PROPERTIES:  pH dependence:...   \n",
       "Redox potential                                                       NaN   \n",
       "Rhea Ids                                                       RHEA:26554   \n",
       "Site                                                                  NaN   \n",
       "Temperature dependence                                                NaN   \n",
       "Annotation                                                     5 out of 5   \n",
       "Caution                                                               NaN   \n",
       "Features                Active site (2); Alternative sequence (2); Bet...   \n",
       "\n",
       "                                                                        2  \\\n",
       "Entry                                                              D5HJT4   \n",
       "Length                                                               1242   \n",
       "Metal binding                                                         NaN   \n",
       "Nucleotide binding                                                    NaN   \n",
       "Cofactor                                                              NaN   \n",
       "Binding site                                                          NaN   \n",
       "Active site                                                           NaN   \n",
       "Entry name                                                   D5HJT4_BCHK3   \n",
       "Gene names                                                              S   \n",
       "Organism                                      Bat SARS coronavirus HKU3-7   \n",
       "Absorption                                                            NaN   \n",
       "Activity regulation                                                   NaN   \n",
       "Calcium binding                                                       NaN   \n",
       "Catalytic activity                                                    NaN   \n",
       "DNA binding                                                           NaN   \n",
       "EC number                                                             NaN   \n",
       "Function [CC]           FUNCTION: Spike protein S1: attaches the virio...   \n",
       "Kinetics                                                              NaN   \n",
       "Pathway                                                               NaN   \n",
       "pH dependence                                                         NaN   \n",
       "Redox potential                                                       NaN   \n",
       "Rhea Ids                                                              NaN   \n",
       "Site                    SITE 654..655;  /note=\"Cleavage\";  /evidence=\"...   \n",
       "Temperature dependence                                                NaN   \n",
       "Annotation                                                     4 out of 5   \n",
       "Caution                 CAUTION: Lacks conserved residue(s) required f...   \n",
       "Features                Chain (2); Coiled coil (2); Disulfide bond (2)...   \n",
       "\n",
       "                                                                        3  \n",
       "Entry                                                          A0A2R3SV02  \n",
       "Length                                                               7092  \n",
       "Metal binding                                                         NaN  \n",
       "Nucleotide binding                                                    NaN  \n",
       "Cofactor                                                              NaN  \n",
       "Binding site                                                          NaN  \n",
       "Active site                                                           NaN  \n",
       "Entry name                                               A0A2R3SV02_CVHSA  \n",
       "Gene names                                                            NaN  \n",
       "Organism                                        Bat SARS-like coronavirus  \n",
       "Absorption                                                            NaN  \n",
       "Activity regulation                                                   NaN  \n",
       "Calcium binding                                                       NaN  \n",
       "Catalytic activity      CATALYTIC ACTIVITY: Reaction=ATP + H2O = ADP +...  \n",
       "DNA binding                                                           NaN  \n",
       "EC number                                                             NaN  \n",
       "Function [CC]                                                         NaN  \n",
       "Kinetics                                                              NaN  \n",
       "Pathway                                                               NaN  \n",
       "pH dependence                                                         NaN  \n",
       "Redox potential                                                       NaN  \n",
       "Rhea Ids                                           RHEA:13065; RHEA:21248  \n",
       "Site                                                                  NaN  \n",
       "Temperature dependence                                                NaN  \n",
       "Annotation                                                     3 out of 5  \n",
       "Caution                                                               NaN  \n",
       "Features                Compositional bias (1); Domain (6); Region (1)...  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "uniprotMeta = pd.read_csv('../Annotations/UniProt/uniprot_Q695T7_Q9BYF1_D5HJT4_A0A2R3SV02.tab', sep='\\t')\n",
    "uniprotMeta.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
